home *** CD-ROM | disk | FTP | other *** search
- /* New master XFCN "zbrowser(...)"
- * copyright 1990 - Mark Zimmermann
- *
- * revised 900412 -- filter out ASCII 0-12,14-31, and 127 from returned text
- * so that highlighting when retrieving from non-TEXT files can work reliably
- *
- * For further information, write:
- * Mark ^Zimmermann
- * P.O. Box 8310
- * Silver Spring, MD 20907
- * USA
- *
- * Be sure to enclose a STAMPED, SELF-ADDRESSED ENVELOPE if you want
- * to receive a timely reply!
- *
- * Electronic addresses:
- * science@nems.dt.navy.mil
- * [75066,2044] CompuServe
- */
-
-
- /* ------------------------header files to include-------------- */
-
- #include <MacTypes.h>
- #include <FileMgr.h>
- #include <HyperXCmd.h>
- #include <SetupA4.h>
-
-
- /* ---------------declarations and definitions------------------- */
-
-
- /* KEY_LENGTH is the number of letters we have in each index record;
- * 28 is the value chosen for the past year as optimal...don't
- * change it without good reason!
- */
- #define KEY_LENGTH 28
-
- /* SUBSET_QUANTUM is the distance in bytes that defines the proximity
- * neighborhood of a word in the index ... it is used when defining a
- * subset for proximity searching.... SUBSET_QUANTUM * 8 is the
- * effective compression factor for squeezing the main file down into
- * an array of one-bit flags showing which regions of the database are
- * in the current working subset.
- *
- * Thus, SUBSET_QUANTUM = 32, a nice choice, defines a chunkiness of 32
- * characters in making comparisons for proximity determination purposes,
- * and results in a compression factor of 256. Thus, in a typical 1 MB Mac
- * with ~100kB free running HyperCard, that should allow subset browsing
- * of databases up to ~25 MB....
- */
- #define SUBSET_QUANTUM 32
-
- /* structure of the records in the index key file:
- * a fixed-length character string, padded out with blanks and
- * containing the unique alphanumeric 'words' in the document
- * file, changed to all-capital letters;
- * a cumulative count of how many total occurrences of words, including
- * the current one, have appeared up to this point in the sorted
- * index.
- */
- typedef struct
- {
- char kkey[KEY_LENGTH];
- long ccount;
- } KEY_RECORD;
-
- /* some symbolic values... */
- #define TRUE 1
- #define FALSE 0
- #define NULL 0
-
-
- /* ---------------prototypes------------------- */
-
- pascal void main (XCmdBlockPtr paramPtr);
- void doAndSubsets (XCmdBlockPtr paramPtr);
- void doBooleanNotSubset (XCmdBlockPtr paramPtr);
- void doContext (XCmdBlockPtr paramPtr);
- void doEmptySubset (XCmdBlockPtr paramPtr);
- void doFillSubset (XCmdBlockPtr paramPtr);
- void doIndex (XCmdBlockPtr paramPtr);
- void doLocate (XCmdBlockPtr paramPtr);
- void doNewSubset (XCmdBlockPtr paramPtr);
- void doOrSubsets (XCmdBlockPtr paramPtr);
- void doReleaseSubset (XCmdBlockPtr paramPtr);
- void doSetSubsetBits (XCmdBlockPtr paramPtr);
- void doText (XCmdBlockPtr paramPtr);
- void returnErrorMsg (XCmdBlockPtr paramPtr, char *msg);
- void getKeyRecord (KEY_RECORD *keyRecp, long keyRecNum, int keyFileRefNum);
- long getTextPtr (long instanceNum, int ptrFileRefNum);
- void getContextLine (long bytes, long textPtr, int refNum, char *ansp);
- void buildIndexAnswer (char *ansp, int indexCountWidth, int indexKeyWidth,
- long count, char key[]);
- void buildSubIndexAnswer (char *ansp, int indexCountWidth,
- int indexKeyWidth, long maxIndexSampleCount, long prevCcount,
- long thisCcount, char key[], Handle subsetHandle, int ptrFileRefNum);
- int inSubset (long textPtr, Handle subsetHandle);
- void setSSBit (long textPtr, int setOrClear, Handle subsetHandle);
- char *strcpy (char *s1, char *s2);
- int strlen (char *s);
- int zstrcmp (unsigned char *s1, unsigned char *s2);
- long atol (char *s);
- void ltoaR (char *ansp, long n, int maxDigits);
-
-
- /* ------------------------main program-------------- */
-
-
- /* main routine, to dispatch control to a function
- * defined by the first letter of the first argument of the XFCN
- */
-
- pascal void main (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- RememberA0 ();
- SetUpA4();
- switch (**(paramPtr->params[0]))
- {
- case 'A':
- doAndSubsets (paramPtr);
- break;
- case 'B':
- doBooleanNotSubset (paramPtr);
- break;
- case 'C':
- doContext (paramPtr);
- break;
- case 'E':
- doEmptySubset (paramPtr);
- break;
- case 'F':
- doFillSubset (paramPtr);
- break;
- case 'I':
- doIndex (paramPtr);
- break;
- case 'L':
- doLocate (paramPtr);
- break;
- case 'N':
- doNewSubset (paramPtr);
- break;
- case 'O':
- doOrSubsets (paramPtr);
- break;
- case 'R':
- doReleaseSubset (paramPtr);
- break;
- case 'S':
- doSetSubsetBits (paramPtr);
- break;
- case 'T':
- doText (paramPtr);
- break;
- default:
- returnErrorMsg (paramPtr,
- "{Sorry, unrecognized command in TEX zbrowser XFCN call!}");
- break;
- }
- RestoreA4();
- return;
- }
-
-
- /* ---------------------major functional units-------------- */
-
-
- /* function to logically AND two subsets, and put the resulting intersection
- * of sets into the first of the two
- *
- * ("ANDSUBSETS", subsetHandle1, subsetHandle2)
- * -- returns quietly with nothing if it successfully ANDs all bits
- * in the first subset flag array with the second array; beeps
- * and gives an error msg if it fails somehow...
- */
-
- void doAndSubsets (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- long subsetSize;
- Handle subsetHandle1, subsetHandle2;
- register char *cp1, *cp2, *endOfSubset;
-
- if (paramPtr->paramCount != 3)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of parameters in XFCN ANDSUBSETS call!}");
- return;
- }
-
- subsetHandle1 = (Handle) atol (*(paramPtr->params[1]));
- subsetHandle2 = (Handle) atol (*(paramPtr->params[2]));
-
- if (subsetHandle1 == NULL || subsetHandle2 == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, NULL subsetHandle in XFCN ANDSUBSETS call!}");
- return;
- }
-
- subsetSize = GetHandleSize (subsetHandle1);
- if (subsetSize != GetHandleSize (subsetHandle2))
- {
- returnErrorMsg (paramPtr,
- "{Sorry, inconsistent subsetHandle sizes in XFCN ANDSUBSETS call!}");
- return;
- }
- endOfSubset = *subsetHandle1 + subsetSize;
- for (cp1 = *subsetHandle1, cp2 = *subsetHandle2; cp1 < endOfSubset; ++cp1, ++cp2)
- *cp1 &= *cp2;
-
- return;
- }
-
-
- /* function to logically NOT a subset, inverting its contents
- *
- * ("BOOLEANNOTSUBSET", subsetHandle)
- * -- returns quietly with nothing if it successfully NOTs all bits
- * in the subset flag array; beeps
- * and gives an error msg if it fails somehow...
- */
-
- void doBooleanNotSubset (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- long subsetSize;
- Handle subsetHandle;
- register char *cp, *endOfSubset;
-
- if (paramPtr->paramCount != 2)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of parameters in XFCN BOOLEANNOTSUBSET call!}");
- return;
- }
-
- subsetHandle = (Handle) atol (*(paramPtr->params[1]));
-
- if (subsetHandle == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, NULL subsetHandle in XFCN BOOLEANNOTSUBSET call!}");
- return;
- }
-
- subsetSize = GetHandleSize (subsetHandle);
- endOfSubset = *subsetHandle + subsetSize;
- for (cp = *subsetHandle; cp < endOfSubset; ++cp)
- *cp = ~*cp;
-
- return;
- }
-
-
- /* function to create the context display...
- *
- * ("CONTEXT", instanceNum, contextLines, targetContextLine,
- * contextLineLength, contextWordOffset, maxContextLinesSkipped,
- * contextGutterWidth, ptrFileRefNum, textFileRefNum, subsetHandle)
- * -- returns with contextLines of display followed by contextLines
- * of instanceNum-textPtr pairs, with context instance instanceNum
- * on line targetContextLine; if contextGutterWidth is non-zero,
- * then that number of spaces are placed before the key word...
- */
-
- void doContext (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- int contextLines, targetContextLine, contextLineLength,
- contextWordOffset, ptrFileRefNum, textFileRefNum, line,
- contextGutterWidth;
- long instanceNum, maxContextLinesSkipped, textPtr, j, *tempNum;
- Handle subsetHandle, answer, tempStor;
- char *ansp;
- register int i;
- register char *cp1, *cp2;
-
- if (paramPtr->paramCount != 11)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong number of parameters in XFCN CONTEXT call!}");
- return;
- }
-
- instanceNum = atol (*(paramPtr->params[1]));
- contextLines = atol (*(paramPtr->params[2]));
- targetContextLine = atol (*(paramPtr->params[3]));
- contextLineLength = atol (*(paramPtr->params[4]));
- contextWordOffset = atol (*(paramPtr->params[5]));
- maxContextLinesSkipped = atol (*(paramPtr->params[6]));
- contextGutterWidth = atol (*(paramPtr->params[7]));
- ptrFileRefNum = atol (*(paramPtr->params[8]));
- textFileRefNum = atol (*(paramPtr->params[9]));
- subsetHandle = (Handle) atol (*(paramPtr->params[10]));
-
- if (instanceNum < 0 || contextLines < 1 || targetContextLine < 1 ||
- targetContextLine > contextLines ||
- contextLineLength < KEY_LENGTH + contextWordOffset ||
- contextWordOffset < 0 || maxContextLinesSkipped < 1 ||
- contextGutterWidth > contextWordOffset ||
- ptrFileRefNum == NULL || textFileRefNum == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, bad parameter in XFCN CONTEXT call!}");
- return;
- }
-
- if ((answer = NewHandle (contextLines * (contextLineLength + 26) + 1))
- == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, out of memory error in XFCN CONTEXT call!}");
- return;
- }
-
- /* tempStor is used to store values for instanceNums and textPtrs */
- if ((tempStor = NewHandle (contextLines * 2 * sizeof(long))) == NULL)
- {
- DisposHandle (answer);
- returnErrorMsg (paramPtr,
- "{Sorry, secondary out-of-memory error in XFCN CONTEXT call!}");
- return;
- }
-
- /* back up to the correct starting instanceNum */
- for (line = targetContextLine; line > 1; --line)
- {
- for (j = 0; j < maxContextLinesSkipped; ++j)
- {
- textPtr = getTextPtr (--instanceNum, ptrFileRefNum);
- if (subsetHandle == NULL || textPtr < 0 ||
- inSubset (textPtr, subsetHandle))
- break;
- }
- }
-
- HLock (answer);
- HLock (tempStor);
- ansp = *answer;
- tempNum = (long *) *tempStor;
-
- /* generate the lines of the context display, saving numbers */
- for (line = 0; line < contextLines; ++line)
- {
- for (j = 0; j < maxContextLinesSkipped; ++j, ++instanceNum)
- {
- textPtr = getTextPtr (instanceNum, ptrFileRefNum);
- if (textPtr < 0)
- break;
- if (subsetHandle == NULL ||
- inSubset (textPtr, subsetHandle))
- {
- getContextLine (contextLineLength,
- textPtr - contextWordOffset, textFileRefNum, ansp);
- if (contextGutterWidth > 0)
- {
- cp1 = ansp + contextLineLength - 1 - contextGutterWidth;
- cp2 = ansp + contextLineLength - 1;
- for (i = contextLineLength - contextWordOffset - contextGutterWidth;
- i > 0; --i)
- *cp2-- = *cp1--;
- for (i = contextGutterWidth; i > 0; --i)
- *cp2-- = ' ';
- }
- ansp += contextLineLength;
- break;
- }
- }
- if (j == maxContextLinesSkipped)
- {
- textPtr = -1;
- --instanceNum;
- for (i = contextLineLength; i > 0; --i)
- *ansp++ = '.';
- }
- tempNum[line] = instanceNum++;
- tempNum[line + contextLines] = textPtr;
- *ansp++ = '\r';
- }
-
- for (line = 0; line < contextLines; ++line)
- {
- ltoaR (ansp, tempNum[line], 12);
- ansp += 12;
- ltoaR (ansp, tempNum[line + contextLines], 12);
- ansp += 12;
- *ansp++ = '\r';
- }
-
- *ansp = '\0';
- HUnlock (answer);
- paramPtr->returnValue = answer;
- HUnlock (tempStor);
- DisposHandle (tempStor);
- return;
- }
-
-
- /* function to empty out a subset so that no words are in the valid
- * region:
- *
- * ("EMPTYSUBSET", subsetHandle)
- * -- returns quietly with nothing if it successfully sets all bits
- * in the subset flag array to zero; beeps and gives an error msg
- * if it fails somehow...
- */
-
- void doEmptySubset (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- long subsetSize;
- Handle subsetHandle;
- register char *cp, *endOfSubset;
-
- if (paramPtr->paramCount != 2)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of parameters in XFCN EMPTYSUBSET call!}");
- return;
- }
-
- subsetHandle = (Handle) atol (*(paramPtr->params[1]));
-
- if (subsetHandle == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, NULL subsetHandle in XFCN EMPTYSUBSET call!}");
- return;
- }
-
- subsetSize = GetHandleSize (subsetHandle);
- endOfSubset = *subsetHandle + subsetSize;
- for (cp = *subsetHandle; cp < endOfSubset; ++cp)
- *cp = 0x00;
-
- return;
- }
-
-
- /* function to fill a subset so that the entire database is in the
- * valid region:
- *
- * ("FILLSUBSET", subsetHandle)
- * -- returns quietly with nothing if it successfully sets all bits
- * in the subset flag array to one; beeps and gives an error msg
- * if failure...
- */
-
- void doFillSubset (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- long subsetSize;
- Handle subsetHandle;
- register char *cp, *endOfSubset;
-
- if (paramPtr->paramCount != 2)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of parameters in XFCN FILLSUBSET call!}");
- return;
- }
-
- subsetHandle = (Handle) atol (*(paramPtr->params[1]));
-
- if (subsetHandle == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, NULL subsetHandle in XFCN FILLSUBSET call!}");
- return;
- }
-
- subsetSize = GetHandleSize (subsetHandle);
- endOfSubset = *subsetHandle + subsetSize;
- for (cp = *subsetHandle; cp < endOfSubset; ++cp)
- *cp = 0xFF;
-
- return;
- }
-
-
- /* function to produce the index window display and associated info
- *
- * ("INDEX", wordNum, indexLines, maxIndexSampleCount, indexCountWidth,
- * indexKeyWidth, keyFileRefNum, ptrFileRefNum, subsetHandle)
- * -- returns with indexLines of index window display, followed by
- * indexLines of instanceNums. The index lines are:
- * indexCountWidth columns of occurrence count info (right-justified),
- * a blank column, and indexKeyWidth columns of keyWord (in all
- * caps, left-justified). Demand that indexCountWidth be at least
- * 5, to allow for subindex count display, and that indexKeyWidth
- * be in the range 1 through KEY_LENGTH = 28 ...
- */
-
- void doIndex (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- KEY_RECORD thisRec, prevRec;
- register int i;
- int indexLines, keyFileRefNum, indexCountWidth, ptrFileRefNum,
- keyRecsFound, indexKeyWidth;
- long wordNum, maxIndexSampleCount;
- Handle subsetHandle, answer;
- char *ansp;
-
- if (paramPtr->paramCount != 9)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong number of parameters in XFCN INDEX call!}");
- return;
- }
-
- wordNum = atol (*(paramPtr->params[1]));
- indexLines = atol (*(paramPtr->params[2]));
- maxIndexSampleCount = atol (*(paramPtr->params[3]));
- indexCountWidth = atol (*(paramPtr->params[4]));
- indexKeyWidth = atol (*(paramPtr->params[5]));
- keyFileRefNum = atol (*(paramPtr->params[6]));
- ptrFileRefNum = atol (*(paramPtr->params[7]));
- subsetHandle = (Handle) atol (*(paramPtr->params[8]));
-
- if (wordNum < 0 || indexLines < 1 || maxIndexSampleCount < 1 ||
- indexCountWidth < 5 || indexKeyWidth < 1 ||
- indexKeyWidth > KEY_LENGTH || keyFileRefNum == 0 ||
- ptrFileRefNum == 0)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, bad parameter in XFCN INDEX call!}");
- return;
- }
-
-
- if ((answer = NewHandle (indexLines *
- (indexCountWidth + indexKeyWidth + 15) + 1)) == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, out of memory error in XFCN INDEX call!}");
- return;
- }
-
- HLock (answer);
- ansp = *answer;
-
- getKeyRecord (&prevRec, wordNum - 1, keyFileRefNum);
-
- for (i = 0; i < indexLines; ++i)
- {
- getKeyRecord (&thisRec, wordNum + i, keyFileRefNum);
- if (thisRec.ccount == 0)
- break;
-
- if (subsetHandle == NULL)
- buildIndexAnswer (ansp, indexCountWidth, indexKeyWidth,
- thisRec.ccount - prevRec.ccount, thisRec.kkey);
- else
- buildSubIndexAnswer (ansp, indexCountWidth, indexKeyWidth,
- maxIndexSampleCount, prevRec.ccount, thisRec.ccount,
- thisRec.kkey, subsetHandle, ptrFileRefNum);
-
- ansp += indexCountWidth + indexKeyWidth + 2;
- prevRec.ccount = thisRec.ccount;
- }
-
- keyRecsFound = i;
- for (i = keyRecsFound; i < indexLines; ++i)
- *ansp++ = '\r';
-
- for (i = 0; i < keyRecsFound; ++i)
- {
- getKeyRecord (&thisRec, wordNum + i - 1, keyFileRefNum);
- ltoaR (ansp, thisRec.ccount, 12);
- ansp += 12;
- *ansp++ = '\r';
- }
-
- for (i = keyRecsFound; i < indexLines; ++i)
- *ansp++ = '\r';
-
- *ansp = '\0';
- HUnlock (answer);
- paramPtr->returnValue = answer;
- return;
- }
-
-
- /* function to find a chosen string in the index key file (just do a
- * binary search to locate it):
- *
- * ("LOCATE", targetString, keyFileRefNum)
- * -- returns wordNum for the targetString if it is found in the
- * key file; otherwise returns wordNum for the word alphabetically
- * preceding targetString followed by "{targetString not found!}"
- * on the second line of the answer...
- *
- */
-
- void doLocate (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- register int i, c;
- int keyFileRefNum, diff;
- char *cp;
- register long mid;
- long low, high, keyFileSize;
- KEY_RECORD thisRec, targetRec;
- Handle answer;
-
- if (paramPtr->paramCount != 3)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong number of parameters in XFCN LOCATE call!}");
- return;
- }
-
- keyFileRefNum = atol (*(paramPtr->params[2]));
-
- if (keyFileRefNum == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, NULL keyFileRefNum error in XFCN LOCATE call!}");
- return;
- }
-
- cp = *(paramPtr->params[1]);
- for (i = 0; i < KEY_LENGTH; ++i)
- {
- c = *cp;
- if (c == '\0')
- {
- targetRec.kkey[i] = ' ';
- continue;
- }
- if (c >= 'a' && c <= 'z')
- c = c - 'a' + 'A';
- targetRec.kkey[i] = c;
- ++cp;
- }
-
- low = 0;
- GetEOF (keyFileRefNum, &keyFileSize);
- high = keyFileSize / sizeof (KEY_RECORD) - 1;
-
- while (low <= high)
- {
- mid = (low + high) / 2;
- getKeyRecord (&thisRec, mid, keyFileRefNum);
- if (thisRec.ccount == 0)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, possible file I/O error in XFCN LOCATE call!}");
- return;
- }
- diff = zstrcmp ((unsigned char *)targetRec.kkey,
- (unsigned char *)thisRec.kkey);
- if (diff < 0)
- high = mid - 1;
- else if (diff > 0)
- low = mid + 1;
- else
- break;
- }
-
- if (diff < 0)
- --mid;
- if (mid < 0)
- mid = 0;
-
- if ((answer = NewHandle (64)) == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, out of memory error in XFCN LOCATE call!}");
- return;
- }
- ltoaR (*answer, mid, 12);
- *(*answer + 12) = '\0';
- if (diff != 0)
- strcpy (*answer + 12, "\r{target string not found!}");
- paramPtr->returnValue = answer;
- return;
- }
-
-
- /* function to create a new subset:
- *
- * ("NEWSUBSET", textFileRefNum)
- * -- returns subsetHandle for a new subset that it creates, big
- * enough to do subset browsing -- but does NOT initialize that
- * subset or check to see whether another subset already
- * exists. Beeps and gives error msg if it fails...
- */
-
- void doNewSubset (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- int textFileRefNum;
- long textFileSize, subsetSize;
- Handle subsetHandle, answer;
-
- if (paramPtr->paramCount != 2)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of parameters in XFCN NEWSUBSET call!}");
- return;
- }
-
- textFileRefNum = atol (*(paramPtr->params[1]));
- if (textFileRefNum == NULL ||
- GetEOF (textFileRefNum, &textFileSize) != noErr)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, file error in XFCN NEWSUBSET call!}");
- return;
- }
-
- subsetSize = 1 + textFileSize / (SUBSET_QUANTUM * 8);
-
- if ((subsetHandle = NewHandle (subsetSize)) == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, not enough memory for subset creation!}");
- return;
- }
-
- if ((answer = NewHandle (16)) == NULL)
- {
- DisposHandle (subsetHandle);
- returnErrorMsg (paramPtr,
- "{Sorry, out of memory error in XFCN NEWSUBSET call!}");
- return;
- }
- ltoaR (*answer, (long)subsetHandle, 12);
- *(*answer + 12) = '\0';
- paramPtr->returnValue = answer;
-
- return;
-
- }
-
-
- /* function to logically OR two subsets, and put the resulting union
- * of sets into the first of the two
- *
- * ("ORSUBSETS", subsetHandle1, subsetHandle2)
- * -- returns quietly with nothing if it successfully ORs all bits
- * in the first subset flag array with the second array; beeps
- * and gives an error msg if it fails somehow...
- */
-
- void doOrSubsets (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- long subsetSize;
- Handle subsetHandle1, subsetHandle2;
- register char *cp1, *cp2, *endOfSubset;
-
- if (paramPtr->paramCount != 3)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of parameters in XFCN ORSUBSETS call!}");
- return;
- }
-
- subsetHandle1 = (Handle) atol (*(paramPtr->params[1]));
- subsetHandle2 = (Handle) atol (*(paramPtr->params[2]));
-
- if (subsetHandle1 == NULL || subsetHandle2 == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, NULL subsetHandle in XFCN ORSUBSETS call!}");
- return;
- }
-
- subsetSize = GetHandleSize (subsetHandle1);
- if (subsetSize != GetHandleSize (subsetHandle2))
- {
- returnErrorMsg (paramPtr,
- "{Sorry, inconsistent subsetHandle sizes in XFCN ORSUBSETS call!}");
- return;
- }
- endOfSubset = *subsetHandle1 + subsetSize;
- for (cp1 = *subsetHandle1, cp2 = *subsetHandle2; cp1 < endOfSubset; ++cp1, ++cp2)
- *cp1 |= *cp2;
-
- return;
- }
-
-
- /* routine to get rid of a subset and release that memory:
- *
- * ("RELEASESUBSET", subsetHandle)
- * -- returns quietly with nothing if successful in releasing the
- * subsetHandle, or noisily with an error message if it fails...
- */
-
- void doReleaseSubset (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- Handle subsetHandle;
-
- if (paramPtr->paramCount != 2)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of params in XFCN RELEASESUBSET call!}");
- return;
- }
-
- subsetHandle = (Handle) atol (*(paramPtr->params[1]));
- if (subsetHandle == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, NULL subsetHandle in XFCN RELEASESUBSET call!}");
- return;
- }
-
- DisposHandle (subsetHandle);
- return;
- }
-
-
- /* function to turn on or off bits in a subset according to their
- * proximity to a given word's occurrences:
- *
- * ("SETSUBSETBITS", wordNum, neighborhoodSize, setOrClear,
- * keyFileRefNum, ptrFileRefNum, subsetHandle)
- * -- returns quietly with nothing if it is successful in setting or
- * clearing (depending on setOrClear's value, 0 or non-0) the
- * bits in the subset flag array in the neighborhood of the
- * chosen word(s); gives an error msg if there was a problem.
- * neighborhoodSize is in characters and is used to determine
- * how many bits to set/clear on each side of the instances...
- */
-
- void doSetSubsetBits (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- long wordNum, neighborhoodSize, bitsToSet, maxTextPtr, instance,
- tp0, tp, tpMax;
- int setOrClear, keyFileRefNum, ptrFileRefNum;
- Handle subsetHandle;
- KEY_RECORD prevRec, thisRec;
-
- if (paramPtr->paramCount != 7)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, wrong # of parameters in XFCN SETSUBSET call!}");
- return;
- }
-
-
- wordNum = atol (*(paramPtr->params[1]));
- neighborhoodSize = atol (*(paramPtr->params[2]));
- setOrClear = atol (*(paramPtr->params[3]));
- keyFileRefNum = atol (*(paramPtr->params[4]));
- ptrFileRefNum = atol (*(paramPtr->params[5]));
- subsetHandle = (Handle) atol (*(paramPtr->params[6]));
-
- if (wordNum < 0 || neighborhoodSize < 1 ||
- keyFileRefNum == NULL || ptrFileRefNum == NULL ||
- subsetHandle == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, bad parameter in XFCN SETSUBSET call!}");
- return;
- }
-
- bitsToSet = (neighborhoodSize * 2) / SUBSET_QUANTUM + 1;
- maxTextPtr = GetHandleSize (subsetHandle) * SUBSET_QUANTUM * 8;
- getKeyRecord (&prevRec, wordNum - 1, keyFileRefNum);
- getKeyRecord (&thisRec, wordNum, keyFileRefNum);
-
- for (instance = prevRec.ccount; instance < thisRec.ccount; ++instance)
- {
- tp0 = getTextPtr (instance, ptrFileRefNum);
- if (tp0 < 0)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, getTextPtr I/O error in XFCN SETSUBSET call!}");
- return;
- }
- tp = tp0 - (bitsToSet / 2) * SUBSET_QUANTUM;
- tpMax = tp + bitsToSet * SUBSET_QUANTUM;
- if (tp < 0)
- tp = 0;
- if (tpMax > maxTextPtr)
- tpMax = maxTextPtr;
- for ( ; tp < tpMax; tp += SUBSET_QUANTUM)
- setSSBit (tp, setOrClear, subsetHandle);
- }
-
- return;
- }
-
-
- /* function to grab a chunk of text:
- *
- * ("TEXT", textPtr, textChunkSize, textOffset, textFileRefNum)
- * -- returns with (if possible; see below)
- * textChunkSize bytes of text from the text file,
- * starting at byte number textPtr-textOffset+1 and ending
- * just before byte number textPtr-textOffset+textChunkSize+1.
- * (The '+1' is to match up with HyperCard's 1-based counting
- * convention, rather than the 0-based C convention!!)
- * If the file isn't big enough or if textPtr is too near the
- * beginning or end of the file, cut off the retrieved text
- * at that boundary and insert the words {beginning of database}
- * or {end of database}. FILTER OUT ANY '\0' characters in
- * the text that is returned, to avoid problems; also
- * filter out any tabs, since HC mistreats them in its display;
- * and turn any linefeeds ('\n' = 0x0A) to returns ('\r' = 0x0D),
- * for compatibility in reading indexed files from UNIX hosts.
- * ((mod 900412 -- filter out any controls: 0-12,14-31,127))
- * Restrict textChunkSize to <32000 bytes. After the text, on
- * a separate line, return three numbers: the byte number of
- * the first char returned relative to the beginning of the text
- * file, the actual offset within the characters returned
- * of the originally-requested textPtr, and the byte number
- * of the character after the last char returned relative to
- * the beginning of the text file.
- */
-
- void doText (paramPtr)
- XCmdBlockPtr paramPtr;
- {
- int textFileRefNum;
- long textPtr, textChunkSize, textOffset, textFileSize, startText,
- endText, count;
- Handle answer;
- register char *ansp, *cp;
-
- if (paramPtr->paramCount != 5)
- {
- returnErrorMsg (paramPtr,
- "Sorry, wrong number of parameters in XFCN TEXT call!}");
- return;
- }
-
- textPtr = atol (*(paramPtr->params[1]));
- textChunkSize = atol (*(paramPtr->params[2]));
- textOffset = atol (*(paramPtr->params[3]));
- textFileRefNum = atol (*(paramPtr->params[4]));
-
- GetEOF (textFileRefNum, &textFileSize);
-
- if (textPtr < 0 || textPtr > textFileSize || textOffset < 1 ||
- textOffset > textChunkSize || textChunkSize < 1 ||
- textChunkSize > 32000 || textFileRefNum == 0)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, bad parameter in XFCN TEXT call!}");
- return;
- }
-
- startText = textPtr - textOffset + 1;
- if (startText < 0)
- startText = 0;
- endText = textPtr + textChunkSize - textOffset + 1;
- if (endText > textFileSize)
- endText = textFileSize;
-
- count = endText - startText;
- if ((answer = NewHandle (count + 80)) == NULL)
- {
- returnErrorMsg (paramPtr,
- "{Sorry, out of memory error in XFCN TEXT call!}");
- return;
- }
-
- HLock (answer);
- ansp = *answer;
- if (startText == 0)
- {
- strcpy (ansp, "{beginning of database}\r");
- ansp += strlen ("{beginning of database}\r");
- textOffset = textPtr + strlen ("{beginning of database}\r") + 1;
- }
-
- if (SetFPos (textFileRefNum, fsFromStart, startText) != noErr ||
- FSRead (textFileRefNum, &count, ansp) != noErr)
- {
- SysBeep (10);
- strcpy (ansp,
- "{Sorry, file I/O error in XFCN TEXT call!}");
- HUnlock (answer);
- paramPtr->returnValue = answer;
- return;
- }
-
- cp = ansp;
- ansp += count;
-
- for ( ; cp < ansp; ++cp)
- {
- if (*cp == '\n' && *(cp-1) == '\r')
- *cp = ' ';
- if (*cp == '\n')
- *cp = '\r';
- if ((*cp >= 0 && *cp < '\r') || (*cp > '\r' && *cp < ' ') || *cp == 127)
- *cp = ' ';
- }
-
- *ansp++ = '\r';
- if (endText == textFileSize)
- {
- strcpy (ansp, "{end of database}\r");
- ansp += strlen ("{end of database}\r");
- }
- ltoaR (ansp, startText, 12);
- ansp += 12;
- ltoaR (ansp, textOffset, 12);
- ansp += 12;
- ltoaR (ansp, endText, 12);
- ansp += 12;
- *ansp = '\0';
- HUnlock (answer);
- paramPtr->returnValue = answer;
- return;
- }
-
-
-
- /* ------------------------support routines-------------- */
-
-
- /* function to set the return value of the XFCN to a chosen error msg;
- * if there isn't enough free memory to give us a Handle to the msg,
- * beep a bunch and then return!
- */
-
- void returnErrorMsg (paramPtr, msg)
- XCmdBlockPtr paramPtr;
- char *msg;
- {
- Handle answer;
- int msgLength;
-
- SysBeep (10);
- msgLength = strlen (msg);
- if ((answer = NewHandle (1 + msgLength)) == NULL)
- {
- SysBeep (10);
- SysBeep (10);
- SysBeep (10);
- SysBeep (10);
- SysBeep (10);
- return;
- }
-
- strcpy (*answer, msg);
- paramPtr->returnValue = answer;
- return;
- }
-
-
- /* function to fetch an index key record from the key file; if an
- * illegal keyRecNum is asked for, or if any sort of I/O error is
- * reported by SetFPos() or FSRead(), return 0 ccount and blank kkey....
- */
-
- void getKeyRecord (keyRecp, keyRecNum, keyFileRefNum)
- KEY_RECORD *keyRecp;
- long keyRecNum;
- int keyFileRefNum;
- {
- long count;
- register int i;
-
- count = sizeof(KEY_RECORD);
-
- if (keyRecNum < 0 ||
- SetFPos (keyFileRefNum, fsFromStart,
- keyRecNum * sizeof(KEY_RECORD)) != noErr ||
- FSRead (keyFileRefNum, &count, keyRecp) != noErr)
- {
- for (i = 0; i < KEY_LENGTH; ++i)
- keyRecp->kkey[i] = ' ';
- keyRecp->ccount = 0;
- }
-
- return;
- }
-
- /* function to fetch the value of the nth ptr from file ptrFileRefNum;
- * return illegal value (-1) for result if something goes wrong....
- */
-
- long getTextPtr (n, ptrFileRefNum)
- long n;
- int ptrFileRefNum;
- {
- long bytes = sizeof(long), result;
-
- if (SetFPos (ptrFileRefNum, fsFromStart, n * sizeof(long)) != noErr ||
- FSRead (ptrFileRefNum, &bytes, &result) != noErr)
- return (-1);
-
- return (result);
- }
-
-
- /* function to fetch a filtered line of text from the file ... fill
- * in with blanks if try to fetch from before the beginning of the
- * file or after the end of the file ... filter all control characters
- * by turning them into spaces ....
- *
- * modified 880917 to allow display of accented, etc. characters, by fixing
- * the test in the final 'if' statement to work properly with 'signed'
- * characters, and modified to retrieve properly at end of database
- */
-
- void getContextLine (bytes, start, refNum, ans)
- int refNum;
- long bytes, start;
- register char *ans;
- {
- register int i = 0;
- int errno;
- long origbytes;
-
- origbytes = bytes;
- if (start < 0)
- for (i = 0; i < -start; ++i)
- ans[i] = ' ';
- bytes -= i;
-
- if (SetFPos (refNum, fsFromStart, start + i) != noErr ||
- ((errno = FSRead (refNum, &bytes, ans + i)) != noErr &&
- errno != eofErr))
- {
- if (origbytes >
- strlen ("{Sorry, file I/O error in XFCN CONTEXT call!}"))
- {
- strcpy (ans, "{Sorry, file I/O error in XFCN CONTEXT call!}");
- bytes = origbytes;
- }
- else
- SysBeep (10);
- }
-
- if (bytes + i < origbytes)
- for (i += bytes; i < origbytes; ++i)
- ans[i] = ' ';
-
- for (i = 0; i < origbytes; ++i)
- if ((ans[i] >= 0 && ans[i] < 32) || ans[i] == 127)
- ans[i] = ' ';
-
- return;
- }
-
-
- /* function to format an index record, with the count right-justified
- * followed by a space, then the key word itself, followed by a '\r'.
- */
-
- void buildIndexAnswer (ansp, cwidth, kwidth, count, key)
- char *ansp, *key;
- long count;
- int cwidth, kwidth;
- {
- register int i;
-
- ltoaR (ansp, count, cwidth);
- ansp += cwidth;
- *ansp++ = ' ';
- for (i = 0; i < kwidth; ++i)
- *ansp++ = *key++;
- *ansp = '\r';
-
- return;
- }
-
-
- /* function to format an index record when working in a subset; like
- * buildIndexAnswer function above, but with information about how many
- * instances of each word are in the working subset. Specifically,
- * give a percentage estimate based on the last maxIndexSampleCount
- * instances for a word that occurs more than maxIndexSampleCount times
- * (e.g., " ~37% "), and for less frequently occurring words give the
- * actual fraction of valid/total instances (e.g., " 17/49 ").
- */
-
- void buildSubIndexAnswer (ansp, cwidth, kwidth, maxSample,
- prevCcount, thisCcount, key, subsetHandle, ptrFileRefNum)
- char *ansp, *key;
- int cwidth, kwidth, ptrFileRefNum;
- long maxSample, prevCcount, thisCcount;
- Handle subsetHandle;
- {
- long startCcount, instance, goodInstances;
- register int i;
- int goodPercent, subWidth;
-
- if (thisCcount - prevCcount > maxSample)
- startCcount = thisCcount - maxSample;
- else
- startCcount = prevCcount;
-
- goodInstances = 0;
- for (instance = startCcount; instance < thisCcount; ++instance)
- if (inSubset (getTextPtr (instance, ptrFileRefNum),
- subsetHandle))
- ++goodInstances;
-
- if (thisCcount - prevCcount > maxSample)
- {
- goodPercent = (100 * goodInstances) / (thisCcount - startCcount);
- *ansp++ = '~';
- ltoaR (ansp, goodPercent, 3);
- ansp += 3;
- *ansp++ = '%';
- for (i = 5; i < cwidth; ++i)
- *ansp++ = ' ';
- }
- else
- {
- subWidth = (cwidth - 1) / 2;
- ltoaR (ansp, goodInstances, subWidth);
- ansp += subWidth;
- *ansp++ = '/';
- ltoaR (ansp, thisCcount - prevCcount, subWidth);
- ansp += subWidth;
- for (i = 2 * subWidth + 1; i < cwidth; ++i)
- *ansp++ = ' ';
- }
-
- *ansp++ = ' ';
- for (i = 0; i < kwidth; ++i)
- *ansp++ = *key++;
- *ansp = '\r';
-
- return;
- }
-
-
- /* function to determine if a given textPtr is in the subset of
- * interest ... do it by a simple computation and look-up in the
- * bit array of subset flags...
- */
-
- int inSubset (textPtr, subsetHandle)
- long textPtr;
- Handle subsetHandle;
- {
- int bitNum;
- long byteNum;
-
- bitNum = (textPtr % (8 * SUBSET_QUANTUM)) / SUBSET_QUANTUM;
- byteNum = textPtr / (8 * SUBSET_QUANTUM);
-
- return (((*subsetHandle)[byteNum] >> bitNum) & 1);
- }
-
-
- /* function to set or clear a bit in the subset array...
- */
-
- void setSSBit (textPtr, setOrClear, subsetHandle)
- long textPtr;
- int setOrClear;
- Handle subsetHandle;
- {
- int bitNum;
- long byteNum;
-
- bitNum = (textPtr % (8 * SUBSET_QUANTUM)) / SUBSET_QUANTUM;
- byteNum = textPtr / (8 * SUBSET_QUANTUM);
-
- if (setOrClear)
- *(*subsetHandle + byteNum) |= 1 << bitNum;
- else
- *(*subsetHandle + byteNum) &= ~(1 << bitNum);
-
- return;
- }
-
-
- /* function to copy a string from one place to another, in a rather
- * obvious fashion ... adapted from K&R p.101 ....
- */
-
- char *strcpy (s1, s2)
- register char *s1, *s2;
- {
- char *s = s1;
-
- while (*s1++ = *s2++)
- ;
- return (s);
- }
-
-
- /* function to determine the length of a string ... standard thing,
- * adapted from K&R p.98 ....
- */
-
- int strlen (s)
- register char *s;
- {
- char *s0 = s;
-
- while (*s++)
- ;
- return (s - s0 - 1);
- }
-
- /* my function to compare two strings and give a result as to who is
- * alphabetically earlier. Note that this is almost the same as strncmp()
- * with the fixed value of KEY_LENGTH as the maximum comparison distance,
- * except that I must be sure to handle the non-ASCII funny letters in
- * the Apple character set properly/consistently ... hence the need to
- * declare s1 and s2 to be type unsigned char *...
- */
-
- int zstrcmp (s1, s2)
- register unsigned char *s1, *s2;
- {
- register int n = KEY_LENGTH;
-
- for (; --n && *s1 == *s2; s1++, s2++)
- if (!*s1)
- break;
-
- return (*s1 - *s2);
- }
-
-
- /* function to convert alphanumeric string to a long int, from K&R
- * but simplified to avoid using isspace() & isdigit() ....
- */
-
- long atol (s)
- register char *s;
- {
- int signflag = 0;
- register long r = 0;
-
- while (*s == ' ')
- s++;
-
- if (*s == '-')
- {
- signflag = 1;
- s++;
- }
- else if (*s == '+')
- s++;
-
- while (*s >= '0' && *s <= '9')
- r = r * 10 + (*s++ - '0');
-
- return (signflag ? -r : r);
- }
-
-
- /* function to convert a number into a string of width maxDigits and
- * store it right-justified, blank-filled on left; based on K&R p. 60
- * example of itoa().
- *
- * Error handling: put a '>' or '<' in leading place to warn of an
- * overflow (no room for digits on a positive or negative number,
- * respectively), and put a '^' in leading place to warn if no room
- * for '-' sign on negative number...
- */
-
- void ltoaR (ansp, n, maxDigits)
- register char *ansp;
- register long n;
- int maxDigits;
- {
- register int i;
- long sign;
-
- i = maxDigits - 1;
- if ((sign = n) < 0)
- n = -n;
-
- do
- {
- ansp[i--] = n % 10 + '0';
- }
- while ((n /= 10) > 0 && i >= 0);
-
- if (i < 0 && n > 0) /* ran out of room with digits still to go */
- {
- if (sign > 0)
- ansp[0] = '>'; /* positive overflow signal */
- else
- ansp[0] = '<'; /* negative overflow signal */
- }
- else
- {
- if (sign < 0)
- if (i >= 0)
- ansp[i--] = '-';
- else
- ansp[0] = '^'; /* no room for '-' sign signal */
- for ( ; i >= 0; --i)
- ansp[i] = ' ';
- }
-
- return;
- }
-
-